/* ********************************************************************************* */
/* 3 ************************* Market value of innovation ************************** */
/* ********************************************************************************* */
/* Summary   : Code builds an unbalanced firm year panel with annual financial data  */
/* 			   and calculates contribution of innovation to firm value (q)           */
/* 		       following Hall et al (2005)                                           */
/* ********************************************************************************* */



	LIBNAME cow 'C:\cow';
	LIBNAME data 'C:\data';


* Import patent value file from Noah Stoffman website https://host.kelley.iu.edu/nstoffma/;
	PROC IMPORT DATAFILE="C:\data\patent_values.csv" 
		DBMS=csv	OUT=patentnew		REPLACE;	RUN;

	data patentnew; set patentnew; year=year(idate); run;
	proc sort; by permco year; run;

* count the number of patents and the total patent value per year;
	proc means noprint data=patentnew; by permco year;	var xi; 
	output out=innov sum=tsm ; run;
	data innov; set innov; fNpats = _freq_; drop _:;  run;
	data innov; set innov; row=_n_; run;


* download CRSP/Compustat merged header file from WRDS CRSP/Compustat Merged - Fundamentals Annual;
* https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/crspcompustat-merged/fundamentals-annual/ ; 

	data header; set data.Headercrspcomp; permno=lpermno; permco =lpermco ;keep fyear permco permno; run;
	proc sort nodupkey; by fyear permco permno; run;
	proc sql; create table inno_header as select * from innov as a left join Header as b 
	on a.permco=b.permco and a.year=b.fyear; quit; 
	proc sort data=inno_header ; by row;
	data inno; set inno_header; by row; if first.row;run;

* find list of firms with at least one patent;
	data inno_permno; set inno; atleast1patent=1; if 1991<=year;
	keep permno atleast1patent; proc sort nodupkey; by permno; run;


* get patent info 1, 2, 3 years before and after;	
	proc sort data=inno; by permno descending year; run;
	data temp; set inno; if permno~=.;
	l_permno=lag(permno); l2_permno=lag2(permno); l3_permno=lag3(permno); 
	l_year=lag(year); l2_year=lag2(year); l3_year=lag3(year); 
	l_tsm=lag(tsm); l2_tsm=lag2(tsm); l3_tsm=lag3(tsm); 
	l_fnpats=lag(fnpats); l2_fnpats=lag2(fnpats); l3_fnpats=lag3(fnpats); 
	run;
 	data temp;set temp;
	if l_permno=permno and l_year-year=1 then do; tsm_1=l_tsm; fnpats_1=l_fnpats; end;
	if l2_permno=permno and l2_year-year=2 then do; tsm_2=l2_tsm; fnpats_2=l2_fnpats; end;
	if l3_permno=permno and l3_year-year=3 then do; tsm_3=l3_tsm; fnpats_3=l3_fnpats; end;
	drop l_: l2_: l3_:;
	tsm_01=tsm+tsm_1;tsm_012=tsm+tsm_1+tsm_2;
	fnpats_01=fnpats+fnpats_1;fnpats_012=fnpats+fnpats_1+fnpats_2;
	run; 

	
* Notes: Use firm-year panel from mvc_all data file created in SAS program file mvc.sas;

	* get patent data above into firm-year panel from mvc_all file;
	proc sql; create table mvc_inno as select * from mvc_all as a left join temp as b 
	on a.permno=b.permno and a.fyear=b.year; quit; 
	proc sort data=data.comp; by gvkey fyear; run;

* get R&D and other compustat data and calculate Total Q measure following Peters & Taylor (2017);

	* RD and missing RD;
	data funda; set data.funda; * /wrds/comp/sasdata/naa;
	if datafmt="STD" and indfmt="INDL" and popsrc="D" and consol="C"; drop datafmt indfmt popsrc consol CURCD; 
	sic2=floor(sich/100); 
	if xrd=. then d_rdmiss=1; else if xrd~=. then d_rdmiss=0; 
	rd=xrd;
	keep rd gvkey fyear sic2 d_rdmiss;
	run; 
	proc sort; by fyear sic2; run;
	data a; set funda; if rd~=.; run;
	proc means noprint data=a; var rd; by  fyear sic2;
	output out=indavg(drop=_:) mean=indavgrd median=indmedrd; run;
	data data.rd; merge funda indavg; by  fyear sic2; run;
	data rd; set data.rd; gvkey1=gvkey*1; drop gvkey; rename gvkey1=gvkey; proc sort; by gvkey fyear; run; 

	* compustat data generated from SAS program compustat.sas;
	data comp; merge data.comp(in=in1) rd; permno=lpermno; by gvkey fyear; if in1; 
	totalq=(mktcap+ttldebt-act)/(PPE+INTAN); 
	keep sic2 d_rd rd d_rdmiss indavgrd indmedrd sales q totalq capx assets permno fyear employees; run; 
	proc sql; create table mvc_inno1 as select * from mvc_inno as a left join comp as b 
	on a.permno=b.permno and a.fyear=b.fyear; quit; 
	proc sort; by permno fyear descending employees; run;
	data mvc_inno1; set mvc_inno1; by permno fyear ; if first.permno or first.fyear;run;
	proc sql; create table mvc_inno2 as select * from mvc_inno1 as a left join inno_permno as b 
	on a.permno=b.permno ; quit; 


	* get patent cites from Noah Stoffman website https://host.kelley.iu.edu/nstoffma/;
	data patents;
	infile "C:\data\patents.csv" lrecl=32767 dlm="," missover dsd firstobs=2;
	input patnum	fdate	idate	pdate permno	class	subclass	ncites	xi; 
	informat patnum	$20. fdate mmddyy10.	idate	mmddyy10. pdate mmddyy10. permno $5.	class $5.	subclass $5.	ncites 10.	xi 10.;
	keep idate 	permno	class	ncites	xi;
	format idate date9.;
	if permno~=.;
	run;
	data comp; set data.comp; permno=lpermno; keep id gvkey fyear permno cusip fyenddt; run;
	proc sql; create table patents_year as select * from comp as a left join patents as b 
	on a.permno=b.permno and a.fyenddt-365<=b.idate<=a.fyenddt; quit; 
	proc sort data=patents_year; by id gvkey fyear permno cusip fyenddt; run;
	proc means noprint data=patents_year; by id gvkey fyear permno cusip fyenddt; var ncites	xi; 
	output out=cites  sum= patcites patvalue; run;
	data cow.cites; set cites;  if patcites~=. then patcount=_freq_; drop _type_ _freq_; if permno~=.; run;



	proc sql; create table mvc_inno3 as select * from mvc_inno2 as a left join cow.cites as b 
	on a.permno=b.permno and a.fyear=b.fyear; quit; 
	proc sort data= mvc_inno3; by permno fyear descending patcites ; run;
	data mvc_inno3; set mvc_inno3; by permno fyear ; if first.permno or first.fyear;run;

	data mvc_inno4; set mvc_inno3; drop sic2; sic1=floor(sic/1000);
	if ((incorp ="DE" and fyear>2000)
	 or (incorp ="OK" and fyear>2001)
	 or (incorp ="MO" and fyear>2003)
	 or (incorp ="KS" and fyear>2005)
	 or (incorp ="TX" and fyear>2006)
	 or (incorp ="NV" and fyear>2007)) and (tsm=.) then flag=1; 
	if rd~=0 then patentrd=fnpats/rd; 
	citepatent=patcites/fnpats; 
	if d_rdmiss=1 then rdn=indmedrd; else if d_rdmiss=0 then rdn=rd; 
 	run;

	PROC EXPORT DATA= mvc_inno4 OUTFILE= "C:\cow\innotest.dta" DBMS=STATA REPLACE;RUN;
	
